package quicklunch.e2.goodies.utils;

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PushbackReader;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
/**
 * 
 * 
 * @author nakawakashigeto
 *
 */
public abstract class CSVUtils {

    public interface IExecutor {
        public void pre();

        /**
         * 
         * @param row start 0 
         * @param line
         */
        public void exec(long row, List<String> line);

        public void post();
    }

    abstract static public class AbstractExecutor implements IExecutor {
        public void pre() {
        }

        public void exec(long row, List<String> line) {
        }

        public void post() {
        }
    }

    // ===================

    public enum TT {
        EOF("EOF"), FIELD("FIELD"), COMMA("COMMA"), CRLF("CRLF"), CR("CR"), LF(
                "LF");

        String s;

        TT(String s) {
            this.s = s;
        }

        public String toString() {
            return s;
        }
    }

    /**
     * 
     * @author nakawakashigeto
     *
     */
    public static class Token {
        TT type;
        public StringBuilder val = new StringBuilder();

        public Token build(TT type) {
            this.type = type;
            return this;
        }

        public void append(int ch) {
            this.val.append((char) ch);
        }

        public void append(String s) {
            this.val.append(s);
        }

        public String toString() {
            return "T:[" + type + "] V:[" + val + "]";
        }
    }

    /**
     * 
     * @author nakawakashigeto
     *
     */
    public static class CSVTokenizer {

        PushbackReader reader;

        static final int DQUOTE = '"';
        static final int QUOTE = '\'';
        static final int COMMA = ',';
        static final int EOF = -1;
        static final int CR = '\r';
        static final int LF = '\n';

        /* STATE */
        static final int ST_nonescaped = 1;
        static final int ST_escaped = 2;
        static final int ST_escaped_single_quote = 3;

        public CSVTokenizer(String s) {
            this.reader = new PushbackReader(new BufferedReader(
                    new StringReader(s)));
        }

        public CSVTokenizer(InputStream inputStream) {
            this.reader = new PushbackReader(new BufferedReader(
                    new InputStreamReader(inputStream)));
        }

        public CSVTokenizer(InputStream inputStream, String charasetname)
                throws UnsupportedEncodingException {
            this.reader = new PushbackReader(new BufferedReader(
                    new InputStreamReader(inputStream, charasetname)));
        }

        public Token token() throws IOException {

            int state = 0;

            Token token = new Token();
            loop: while (true) {
                int ch = read();

                switch (state) {
                case 0:
                    /*
                     * -- START --
                     */
                    if (ch == EOF) {
                        return token.build(TT.EOF);
                    }

                    // dpuble quote
                    if (ch == DQUOTE) {
                        state = ST_escaped;
                        token.type = TT.FIELD;
                        break;
                    }

                    // single quote
                    if (ch == QUOTE) {
                        state = ST_escaped_single_quote;
                        token.type = TT.FIELD;
                        break;
                    }

                    if (ch == COMMA) {
                        token.append(ch);
                        return token.build(TT.COMMA);
                    }

                    if (ch == CR) {
                        ch = read();
                        if (ch == LF) {
                            // default CRLF
                            return token.build(TT.CRLF);
                        }

                        // suport CR
                        unread(ch);
                        return token.build(TT.CR);
                    }

                    // suport LF
                    if (ch == LF) {
                        return token.build(TT.LF);
                    }

                    state = ST_nonescaped;
                    token.type = TT.FIELD;
                case ST_nonescaped:
                    /*
                     * -- non-escaped --
                     */
                    if (ch == EOF || ch == CR || ch == LF || ch == DQUOTE) {
                        unread(ch);
                        return token;
                    }

                    if (!isTextdata(ch)) {
                        unread(ch);
                        return token;
                    }

                    token.append(ch);
                    break;
                case ST_escaped:
                    /*
                     * -- escaped(double quote) --
                     */

                    if (ch == EOF) {
                        return token.build(TT.FIELD);
                    }

                    // 2DQUOTE
                    if (ch == DQUOTE) {
                        ch = read();
                        if (ch == DQUOTE) {
                            token.append("\"");
                            state = ST_escaped;
                            break;
                        }
                        unread(ch);
                        return token;
                    }

                    token.append(ch);
                    break;

                case ST_escaped_single_quote:
                    /*
                     * -- escaped(single quote) --
                     */
                    if (ch == EOF) {
                        return token.build(TT.FIELD);
                    }

                    // 2DQUOTE
                    if (ch == QUOTE) {
                        ch = read();
                        if (ch == QUOTE) {
                            token.append("\'");
                            state = ST_escaped_single_quote;
                            break;
                        }
                        unread(ch);
                        return token;
                    }

                    token.append(ch);
                    break;

                default:
                    break loop;
                }
            }

            return token;
        }

        boolean isTextdata(int ch) {
            if (notEq(ch, '\r') && notEq(ch, '\n') && notEq(ch, '"')
                    && notEq(ch, ',')) {
                return true;
            }
            return false;
        }

        int read() throws IOException {
            if (reader != null)
                return reader.read();
            return -1;
        }

        boolean notEq(int l, int r) {
            return (l != r);
        }

        void unread(int ch) throws IOException {
            if (reader != null && ch != -1) {
                reader.unread(ch);
            }
        }

        public void close() {
            if (reader != null) {
                try {
                    reader.close();
                } catch (IOException e) {
                }
            }
        }
    }

    public static long parse(InputStream inputStream, IExecutor executor,
            String charasetname) throws IOException {
        long row = 0;
        
        executor.pre();

        try {
            CSVTokenizer tokenizer = new CSVTokenizer(inputStream, charasetname);
            CSVUtils.Token token = null;
            // one previous token
            CSVUtils.Token preToken = null;
            
            do {
                List<String> line = new ArrayList<String>();

                while ((token = tokenizer.token()) != null
                        && !(token.type == TT.EOF || token.type == TT.CRLF
                                || token.type == TT.CR || token.type == TT.LF)) {
                    // check empty field.
                    if ((preToken == null || preToken.type == TT.COMMA)
                            && token.type == TT.COMMA) {
                        line.add("");// empty field...
                        preToken = token;
                        continue;
                    }

                    if (token.type == TT.COMMA) {
                        preToken = token;
                        continue;// skip comma
                    }
                    line.add(token.val.toString());
                    preToken = token;
                }

                // ignore empty line
                if(preToken == null && token.type == TT.EOF){
                    break;
                }
                
                // check empty field.
                if ((preToken != null && preToken.type == TT.COMMA)
                        && (token.type == TT.EOF || token.type == TT.CRLF
                                || token.type == TT.CR || token.type == TT.LF)) {
                    line.add("");// empty field...
                }

                executor.exec(row++, line);
                preToken = null;
            } while (token != null && token.type != TT.EOF);
        } finally {
            executor.post();
        }

        return row;
    }

    public static long parse(InputStream inputStream, IExecutor executor)
            throws IOException {
        return parse(inputStream, executor, System.getProperty("file.encoding"));
    }

    public static long parse(String s, IExecutor executor)
            throws IOException {
        
        InputStream inputStream = new ByteArrayInputStream(s.getBytes("utf-8"));
        
        return parse(inputStream, executor, "utf-8");
    }

}
